library(dtwclust)
library(dtw)
df<-read.csv("../clean_data/gap_list_year.csv")
df
df <- subset(df, select = -c(X) )
df
colnames(df)[colnames(df) == "X2002"] = "2002"
colnames(df)[colnames(df) == "X2003"] = "2003"
colnames(df)[colnames(df) == "X2005"] = "2005"
colnames(df)[colnames(df) == "X2007"] = "2007"
colnames(df)[colnames(df) == "X2009"] = "2009"
colnames(df)[colnames(df) == "X2011"] = "2011"
colnames(df)[colnames(df) == "X2013"] = "2013"
colnames(df)[colnames(df) == "X2015"] = "2015"
colnames(df)[colnames(df) == "X2017"] = "2017"
colnames(df)[colnames(df) == "X2019"] = "2019"
colnames(df)[colnames(df) == "X2022"] = "2022"
df
jurisdiction = df[['Jurisdiction']]
dtw_df <- df[, -1]
dtw_df
df_lst <- tslist(dtw_df)
remove_nan <- function(ts) {
ts[!is.na(ts)]
}
# Apply the function to each time series in the list
df_lst <- lapply(df_lst, remove_nan)
head(df_lst)
$`1`
[1] -11 -15 -15 -10 -12 -9 -9 -12 -9 -17 -12
$`2`
[1] -9 -13 -12 -11 -11 -11 -14 -14 -10 -11 -16
$`3`
[1] -10 -9 -11 -8 -7 -10 -9 -8 -10 -11 -9
$`4`
[1] -11 -9 -11 -10 -8 -10 -10 -13 -8 -12 -10
$`5`
[1] -8 -8 -9 -11 -9 -12 -10 -8 -10 -11 -5
$`6`
[1] -9 -12 -7 -9 -8 -6 -8 -14 -9 -11 -13
df_cvi <- list()
for (i in 2:10){
df_clust <- tsclust(df_lst, type = "partitional", k = i, distance = "dtw_basic", centroid = "pam")
df_metric <- cvi(df_clust, type = "valid", log.base = 10)
df_cvi <- append(df_cvi, list(df_metric))
}
df_cvi_ma <- do.call(rbind, df_cvi)
rw <- c("K2","K3","K4","K5","K6","K7","K8","K9","K10")
rownames(df_cvi_ma) <- rw
print(df_cvi_ma)
Sil SF CH DB DBstar D COP
K2 0.22739686 2.873257e-13 19.323944 1.449895 1.449895 0.2000000 0.5396015
K3 0.09856450 0.000000e+00 20.386100 2.433388 2.554989 0.1086957 0.5149463
K4 0.08680188 0.000000e+00 8.970776 1.825314 1.995751 0.1956522 0.4544818
K5 0.09250820 0.000000e+00 8.031479 2.096970 2.256443 0.2432432 0.4249024
K6 0.04489434 0.000000e+00 7.441265 1.702436 1.844152 0.2162162 0.3907329
K7 0.05592296 0.000000e+00 8.467586 1.663985 1.751137 0.2352941 0.3776254
K8 0.02854510 0.000000e+00 5.119048 1.945333 2.135868 0.2162162 0.3844824
K9 0.04530116 0.000000e+00 6.647671 1.741463 1.858588 0.1250000 0.3495009
K10 0.03973649 0.000000e+00 5.500186 1.450124 1.678790 0.2424242 0.3544874
– “Sil” (!): Silhouette index (Rousseeuw (1987); to be maximized).-K4 – “SF” (~): Score Function (Saitta et al. (2007); to be maximized; see notes). – “CH” (~): Calinski-Harabasz index (Arbelaitz et al. (2013); to be maximized).-k3 – “DB” (?): Davies-Bouldin index (Arbelaitz et al. (2013); to be minimized).k4 – “DBstar” (?): Modified Davies-Bouldin index (DB*) (Kim and Ramakrishna (2005); to be minimized). -k4 – “D” (!): Dunn index (Arbelaitz et al. (2013); to be maximized). k5 – “COP” (!): COP index (Arbelaitz et al. (2013); to be minimized). k9
df_cvi2 <- list()
for (i in 1:100){
df_clust2 <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw_basic", centroid = "pam", seed=i)
df_metric2 <- cvi(df_clust2, type = "valid", log.base = 10)
df_cvi2 <- append(df_cvi2, list(df_metric2))
}
df_cvi_ma2 <- do.call(rbind, df_cvi2)
rw2 <- as.character(seq(1, 100))
rownames(df_cvi_ma2) <- rw2
print(df_cvi_ma2)
Sil SF CH DB DBstar D COP
1 0.02989322 0 8.267930 2.238655 2.404583 0.1777778 0.4748656
2 0.14825364 0 8.041492 1.297871 1.334043 0.2000000 0.4832321
3 0.03754318 0 8.487034 2.259797 2.407522 0.1818182 0.4592522
4 0.02995574 0 15.821127 1.528080 1.730725 0.1162791 0.4531435
5 0.04405823 0 10.642757 2.125626 2.358668 0.2105263 0.4517926
6 0.05016289 0 10.607177 1.960133 2.205713 0.1818182 0.4567662
7 0.09584990 0 9.586402 2.166614 2.404833 0.2432432 0.4420942
8 0.03057376 0 6.831763 2.070889 2.244613 0.2173913 0.4846111
9 0.09755957 0 8.651099 1.879694 1.995972 0.2222222 0.4785655
10 0.04848774 0 11.281703 2.370930 2.658352 0.1666667 0.4556404
11 0.06270603 0 8.779285 2.133016 2.272720 0.2325581 0.4691479
12 0.06574482 0 11.414286 1.768709 1.885482 0.2093023 0.4477900
13 0.02012785 0 12.706969 2.141722 2.213786 0.1538462 0.5099457
14 0.07981032 0 7.604167 1.733807 1.809972 0.1600000 0.4932457
15 0.06508202 0 7.738448 1.725743 1.916918 0.1666667 0.4926933
16 0.01617486 0 8.924327 1.642640 1.831837 0.2307692 0.4723811
17 0.04743631 0 16.351669 2.071868 2.152580 0.1351351 0.4526891
18 0.08792555 0 6.816154 1.855735 2.018651 0.2000000 0.5010613
19 0.04209015 0 10.571089 1.809844 1.899928 0.1777778 0.4563257
20 0.07360877 0 15.078350 1.603211 1.652207 0.2432432 0.4462433
21 0.05460444 0 14.327806 1.857663 1.908164 0.1136364 0.5174351
22 0.12847426 0 8.204271 1.256903 1.256903 0.2000000 0.4901734
23 0.08097232 0 10.646161 2.406364 2.523082 0.2093023 0.4406827
24 0.08616327 0 8.462604 1.944600 2.062539 0.2368421 0.4507423
25 0.03968365 0 13.937931 1.852789 1.928412 0.1777778 0.4493949
26 0.06714750 0 11.032864 1.815795 1.947428 0.2500000 0.4501209
27 0.04971675 0 12.628558 2.101338 2.318158 0.2093023 0.4458800
28 0.08251966 0 9.443823 2.205786 2.333487 0.2162162 0.4292892
29 0.07473192 0 9.058659 2.961499 3.067024 0.2162162 0.4247024
30 0.06142346 0 11.494935 2.058150 2.293132 0.2093023 0.4401212
31 0.10602197 0 10.272119 1.734337 1.804238 0.1860465 0.4464247
32 0.01002484 0 13.024347 1.812776 1.827359 0.1333333 0.4864582
33 0.15279801 0 7.833333 1.625978 1.694586 0.2000000 0.4932490
34 0.10041116 0 9.147943 1.510403 1.525943 0.2307692 0.4765613
35 0.06434506 0 7.177955 2.212042 2.411912 0.1600000 0.4838026
36 0.02639076 0 10.836788 2.363288 2.380773 0.2222222 0.4957626
37 0.08251283 0 7.812162 1.531213 1.691511 0.2368421 0.4921128
38 0.10573307 0 8.445816 1.298063 1.316445 0.2093023 0.5056321
39 0.17573381 0 7.311111 1.372446 1.384480 0.2000000 0.4956488
40 0.04959176 0 14.436639 1.613301 1.706437 0.1956522 0.4451929
41 0.04967502 0 10.547855 2.227850 2.378033 0.1777778 0.4506707
42 0.10136841 0 9.584050 1.690007 1.769669 0.2162162 0.4482273
43 0.07026515 0 13.322677 1.944705 1.957205 0.1333333 0.5019841
44 0.06324101 0 8.454007 2.337532 2.581534 0.1836735 0.4559222
45 0.05801835 0 7.984389 2.395908 2.678626 0.2000000 0.4712285
46 0.02212635 0 8.087387 2.416190 2.646503 0.2222222 0.4751925
47 0.05707356 0 7.389937 3.835284 4.107968 0.1219512 0.4785162
48 0.08649699 0 8.387425 1.815461 1.895862 0.2000000 0.4873304
49 0.04498618 0 14.412483 1.702336 1.702336 0.1219512 0.4916851
50 0.06485977 0 11.438447 2.485559 2.692235 0.1860465 0.4491978
51 0.08094744 0 9.858641 2.132333 2.254781 0.2325581 0.4725768
52 0.04841535 0 8.855072 1.780709 1.920143 0.1818182 0.4645107
53 0.07847850 0 9.786119 1.874647 2.021041 0.2162162 0.4527335
54 0.04947414 0 10.605805 2.189256 2.277582 0.1818182 0.4625581
55 0.07683922 0 9.821544 2.482266 2.670724 0.2162162 0.4342431
56 0.03043748 0 10.392739 1.852975 1.970331 0.1777778 0.4600021
57 0.10723033 0 8.234209 2.222238 2.472416 0.2272727 0.4778762
58 0.13847920 0 7.582754 1.583476 1.617272 0.2432432 0.4849104
59 0.05146238 0 10.312236 2.131660 2.384184 0.1818182 0.4556719
60 0.07351206 0 9.235996 2.617606 2.698928 0.1818182 0.4454483
61 0.05778215 0 8.482505 1.910649 2.017777 0.1860465 0.4561495
62 0.10107278 0 9.755961 1.832484 1.912503 0.1860465 0.4561851
63 0.02245966 0 12.427395 2.297175 2.504672 0.1250000 0.5028415
64 0.07778999 0 10.125751 2.479836 2.709723 0.2162162 0.4400780
65 0.13696355 0 6.588584 1.531025 1.542128 0.2000000 0.5040162
66 0.10459486 0 10.811573 1.753192 1.832836 0.1860465 0.4425003
67 0.05384217 0 12.881954 2.451383 2.610286 0.1388889 0.4350476
68 0.08727954 0 8.890125 1.614515 1.689351 0.2368421 0.4710802
69 0.12816386 0 7.577203 1.429298 1.479647 0.2000000 0.4828737
70 0.08499898 0 11.276434 1.822896 1.936829 0.2432432 0.4535908
71 0.07825209 0 8.887194 1.690115 1.752291 0.2000000 0.4732470
72 0.05796473 0 12.096515 2.229516 2.292315 0.1509434 0.4679609
73 0.08084652 0 9.272557 1.828571 1.971379 0.2045455 0.4628321
74 0.06507073 0 9.387141 2.017291 2.222505 0.2040816 0.4692219
75 0.07401917 0 8.834904 1.857543 1.973086 0.1777778 0.4766310
76 0.05864206 0 17.001397 1.771750 1.879890 0.1162791 0.4683635
77 0.04437851 0 8.578351 2.046981 2.237899 0.2105263 0.4753405
78 0.08793849 0 11.461760 1.929488 2.023072 0.2093023 0.4495606
79 0.05138203 0 7.716100 1.823370 2.046671 0.2045455 0.4775293
80 0.10509382 0 8.775106 1.909397 1.983173 0.2093023 0.4575455
81 0.08050766 0 9.417480 1.876423 1.973351 0.1739130 0.4513057
82 0.06750718 0 13.858974 1.703281 1.845541 0.2093023 0.4400402
83 0.07751424 0 9.885446 1.811177 1.945118 0.2222222 0.4444048
84 0.12156405 0 6.009700 1.498979 1.545998 0.2000000 0.5360617
85 0.06814069 0 9.352262 1.739228 1.754712 0.2307692 0.4697838
86 0.10396097 0 9.338041 1.624344 1.681597 0.2162162 0.4571615
87 0.13089849 0 7.113397 1.887321 1.894423 0.1777778 0.4956051
88 0.06119961 0 15.048526 2.158281 2.216754 0.1250000 0.4685697
89 0.05661194 0 13.795078 2.052063 2.234833 0.1956522 0.4999446
90 0.04947414 0 10.605805 2.189256 2.277582 0.1818182 0.4625581
91 0.04978613 0 13.198487 1.475665 1.529105 0.1041667 0.4818063
92 0.08100250 0 7.448087 1.769583 1.845382 0.2368421 0.4798500
93 0.05111766 0 10.340888 2.001386 2.191069 0.1818182 0.4572010
94 0.09725818 0 9.869115 1.841106 1.915841 0.2162162 0.4479901
95 0.07750829 0 9.824859 1.903319 2.020924 0.2307692 0.4654557
96 0.06922169 0 8.427586 2.030559 2.185746 0.1632653 0.4436718
97 0.05856118 0 15.264957 2.340547 2.409841 0.1111111 0.4860987
98 0.04359991 0 7.544004 2.739786 3.000864 0.2162162 0.4519621
99 0.04105839 0 9.475300 2.308040 2.409135 0.2272727 0.4719360
100 0.08156770 0 10.052778 1.911313 1.984888 0.2105263 0.4467901
for (i in 2:10){df_clust_opt <- tsclust(df_lst, type = "partitional", k = i, distance = "dtw", centroid = "pam",seed = 725)
plot(df_clust_opt)}
for (i in 1:10){df_clust_opt_final <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_final)}
# for (i in 11:20){df_clust_opt_final <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
# plot(df_clust_opt_final)}
#k4
df_clust_opt_final <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = 5)
plot(df_clust_opt_final)
# Extract cluster assignments
cluster_assignments <- df_clust_opt_final@cluster
# Determine the number of clusters
num_clusters <- max(cluster_assignments)
# Loop through each cluster and print the jurisdictions in it
for (cluster_number in 1:num_clusters) {
cat("Jurisdictions in Cluster", cluster_number, ":\n")
# Find the indices of jurisdictions in this cluster
indices_in_cluster <- which(cluster_assignments == cluster_number)
# Print the jurisdictions corresponding to these indices
print(jurisdiction[indices_in_cluster])
cat("\n") # Add a newline for readability
}
Jurisdictions in Cluster 1 :
[1] "California" "Colorado" "Delaware" "Florida" "Iowa" "Louisiana" "Missouri" "Nebraska"
[9] "Nevada" "New Jersey" "New Mexico" "North Dakota" "Ohio" "Oklahoma" "Rhode Island" "South Dakota"
[17] "Tennessee" "Virginia"
Jurisdictions in Cluster 2 :
[1] "Alabama" "Alaska" "Connecticut" "Hawaii" "Maine" "North Carolina" "Utah"
[8] "Vermont" "Washington" "West Virginia" "Wisconsin" "Wyoming"
Jurisdictions in Cluster 3 :
[1] "Arizona" "Arkansas" "Georgia" "Idaho" "Illinois" "Indiana" "Kentucky"
[8] "Maryland" "Massachusetts" "Michigan" "Minnesota" "Mississippi" "Montana" "National"
[15] "New Hampshire" "New York" "Oregon" "South Carolina"
Jurisdictions in Cluster 4 :
[1] "Kansas" "Pennsylvania" "Texas"
# Create an empty dataframe to store the results
jurisdiction_clusters <- data.frame(Jurisdiction = character(), G8_Cluster = numeric(), stringsAsFactors = FALSE)
# Loop through each cluster and append the jurisdictions and their cluster number to the dataframe
for (cluster_number in 1:num_clusters) {
# Find the indices of jurisdictions in this cluster
indices_in_cluster <- which(cluster_assignments == cluster_number)
# Extract the jurisdictions corresponding to these indices
jurisdictions_in_cluster <- jurisdiction[indices_in_cluster]
# Create a temporary dataframe for this cluster
temp_df <- data.frame(Jurisdiction = jurisdictions_in_cluster, G8_Cluster = rep(cluster_number, length(jurisdictions_in_cluster)), stringsAsFactors = FALSE)
# Append the temporary dataframe to the main dataframe
jurisdiction_clusters <- rbind(jurisdiction_clusters, temp_df)
}
# Export the dataframe to a CSV file
write.csv(jurisdiction_clusters, "../clean_data/jurisdiction_clusters_G8.csv", row.names = FALSE)
# View the resulting dataframe
print(jurisdiction_clusters)
# Load necessary libraries
library(ggplot2)
library(reshape2)
# Loop through each cluster
for (cluster_number in 1:num_clusters) {
cat("Plotting jurisdictions in Cluster", cluster_number, ":\n")
# Find the indices of jurisdictions in this cluster
indices_in_cluster <- which(cluster_assignments == cluster_number)
# Get the names of the jurisdictions in this cluster
jurisdictions_in_cluster <- jurisdiction[indices_in_cluster]
# Filter the gap_list_year data frame for these jurisdictions
cluster_data <- df[df$Jurisdiction %in% jurisdictions_in_cluster, ]
# Convert the data to long format for ggplot
long_df <- melt(cluster_data, id.vars = "Jurisdiction", variable.name = "Year", value.name = "Value")
# Plot
p <- ggplot(long_df, aes(x = Year, y = Value, group = Jurisdiction, color = Jurisdiction)) +
geom_line() +
labs(title = paste("Cluster", cluster_number), x = "Year", y = "Gap") +
theme(legend.position = "right")
print(p)
#ggsave(paste("cluster_", cluster_number, ".png", sep=""), plot = p)
}
Plotting jurisdictions in Cluster 1 :
Plotting jurisdictions in Cluster 2 :
Plotting jurisdictions in Cluster 3 :
Plotting jurisdictions in Cluster 4 :